from django.shortcuts import render
from django.http import HttpResponse
import jieba
import logging
from collections import defaultdict

jieba.setLogLevel(logging.INFO)

from article.models import Article
from .models import PostingList


no_key_words = ["中", "一个", "是", "还是", "的", "和",
                "-", "大", "?", '—', '----', "如此",
                "简单", "竟然", "通过", "如何", "判断"]


def clean_no_key_words(word_list):
    """
    清楚非关键字
    :param word_list:
    :return:
    """
    new_word_list = []
    for word in word_list:
        if word not in no_key_words:
            new_word_list.append(word)

    return new_word_list

def gen_inverted_index(request):
    """
        生成倒排索引
    """
    articles = Article.objects.all()
    item_dict = defaultdict(list)
    for article in articles:
        rv = list(set(jieba.cut(article.title.strip(" -？"), cut_all=False)))
        words = clean_no_key_words(rv)

        for word in words:
            item_dict[word].append(str(article.id))

    # print(item_dict)
    for key, value in item_dict.items():

        ids_str = "|".join(value)


        PostingList.objects.create(
            word=key,
            document= ids_str
        )

    return HttpResponse("倒排索引创建成功！")








